library(tidyverse)
library(raster)
library(sf)
source(here::here("Scripts", "funs.R")) # Helper functions
`%nin%` = Negate(`%in%`)

joined1 <- load_adm1()


# Create base grid --------------------------------------------------------

r <- raster(extent(c(-180, 180, -90, 90)),
            resolution = 0.5,
            crs = st_crs(joined1)$proj4string)
r[] <- 1:ncell(r)
grid_sf <- raster_to_sf(r)

grid_sf <-
  grid_sf %>% 
  mutate(geo_id = as.character(geometry)) %>% 
  rename(cell_id = layer)



# Current status ----------------------------------------------------------

# Get file names for current status
cs <- list.files(path = here::here("Data", 
                                   "Raw"), 
                 pattern = "_CS.shp$", 
                 full.names = T)  

# Extract current status files
cs_sf <- map(cs, read_sf)

# File names 
(names_cs <- paste0(substr(cs, 47, 55), "_", "cs"))

# Tabulate regions by year to make sure it lines up
table(substr(names_cs, 1,2), substr(names_cs, 4, 7))


# Clean up values
cs_sf <- cs_sf %>% map(shp_cleaner)


# Drop in the date (yearmonth)
substr(names_cs, 4, 9)
for(i in 1:length(cs_sf)) {
  cs_sf[[i]]$date_code <- substr(names_cs[i], 4, 9)
}

for(i in 1:length(cs_sf)) {
  cs_sf[[i]] <- dplyr::select(cs_sf[[i]], CS, geometry, date_code)
}

# Rasterize and convert to sf
### Note: there are very few cells with multiple values, so the function
### -- used to aggregate (i.e. max) doesn't appear to matter in practice; as rule
### -- taking the max level within cell

cs_sf_grid <- map(cs_sf, ~to_raster(.x, resolution = 0.5, 
                                    extentOf = r, 
                                    field = "CS", fun = "max")) %>% 
  map(., raster_to_sf)

# Drop in the date (yearmonth) and region
for(i in 1:length(cs_sf_grid)) {
  cs_sf_grid[[i]]$date_code <- substr(names_cs[i], 4, 9)
}

for(i in 1:length(cs_sf_grid)) {
  cs_sf_grid[[i]]$region = substr(names_cs[i], 1,2)
}


# Join to grid
cs_sf_grid <- 
  map(cs_sf_grid, ~.x %>% 
        mutate(geo_id = as.character(geometry)) %>% 
        st_drop_geometry() %>% 
        rename(CS = layer) %>% 
        left_join(grid_sf, by = "geo_id") %>% 
        dplyr::select(-geo_id))

# Line up dates using 
cs_grid_time <- map(cs_sf_grid, ~dater(.x, type = "CS"))




# ML2 ----------------------------------------------------------


# Get file names for ML2
ml2 <- list.files(path = here::here("Data", 
                                    "Raw"), 
                  pattern = "_ML2.shp$", 
                  full.names = T)  

# Extract ML2 files
ml2_sf <- map(ml2, read_sf)

# File names 
(names_ml2 <- paste0(substr(ml2, 47, 55), "_", "ml2"))

# Tabulate regions by year to make sure it lines up
table(substr(names_ml2, 1,2), substr(names_ml2, 4,7))

# Check for duplicates
sort(names_ml2)[which(duplicated(sort(names_ml2)))]

# Clean up values
ml2_sf <- ml2_sf %>% map(shp_cleaner)

# Fix bad column names 
map_chr(.x = ml2_sf, ~names(.x)[[1]])
which(map_chr(.x = ml2_sf, ~names(.x)[[1]]) != "ML2")
# names(ml2_sf[[45]])[1] <- "ML2"
names(ml2_sf[[67]])[1] <- "ML2"

table(map_chr(.x = ml2_sf, ~names(.x)[[1]])) # Ok bc ML2 in second column

# Drop in the date (yearmonth)
for(i in 1:length(ml2_sf)) {
  ml2_sf[[i]]$date_code <- substr(names_ml2[i], 4, 9)
}

for(i in 1:length(ml2_sf)) {
  print(i)
  ml2_sf[[i]] <- ml2_sf[[i]] %>% dplyr::select(.,ML2, geometry, date_code)
}

# Rasterize and convert to sf
ml2_sf_grid <- map(ml2_sf, ~to_raster(.x, extentOf = r, 
                                      field = "ML2", 
                                      resolution = 0.5, 
                                      fun = "max")) %>% 
  map(., raster_to_sf)


# Drop in the date (yearmonth) and region
for(i in 1:length(ml2_sf_grid)) {
  ml2_sf_grid[[i]]$date_code <- substr(names_ml2[i], 4, 9)
  
}

for(i in 1:length(ml2_sf_grid)) {
  ml2_sf_grid[[i]]$region = substr(names_ml2[i], 1,2)  
}


ml2_sf_grid <- 
  map(ml2_sf_grid, ~.x %>% 
        mutate(geo_id = as.character(geometry)) %>% 
        st_drop_geometry() %>% 
        rename(ML2 = layer) %>% 
        left_join(grid_sf, by = "geo_id") %>% 
        dplyr::select(-geo_id))

ml2_grid_time <- map(ml2_sf_grid, ~dater(.x, type = "ML2"))




# ML1 ----------------------------------------------------------


# Get file names for ml1
ml1 <- list.files(path = here::here("Data", 
                                    "Raw"), 
                  pattern = "_ML1.shp$", 
                  full.names = T)  

# Extract ml1 files
ml1_sf <- map(ml1, read_sf)

# File names 
(names_ml1 <- paste0(substr(ml1, 47, 55), "_", "ml1"))

# Tabulate regions by year to make sure it lines up
table(substr(names_ml1, 1,2), substr(names_ml1, 4,7))

# Check for duplicates
sort(names_ml1)[which(duplicated(sort(names_ml1)))]

# Clean up values
ml1_sf <- ml1_sf %>% map(shp_cleaner)


# Fix bad column names 
map_chr(.x = ml1_sf, ~names(.x)[[1]])
which(map_chr(.x = ml1_sf, ~names(.x)[[1]]) != "ML1")
# names(ml1_sf[[45]])[1] <- "ML1"
table(map_chr(.x = ml1_sf, ~names(.x)[[1]]))
# Duplicate ML1 column with first misnamed at position 45; leave as is and catch with select below


# Drop in the date (yearmonth)
for(i in 1:length(ml1_sf)) {
  ml1_sf[[i]]$date_code <- substr(names_ml1[i], 4, 9)
}


# Rasterize and convert to sf
ml1_sf_grid <- map(ml1_sf, ~to_raster(.x, extentOf = r, 
                                      field = "ML1", 
                                      resolution = 0.5, 
                                      fun = "max")) %>% 
  map(., raster_to_sf)


# Drop in the date (yearmonth) and region
for(i in 1:length(ml1_sf_grid)) {
  ml1_sf_grid[[i]]$date_code <- substr(names_ml1[i], 4, 9)
  
}

for(i in 1:length(ml1_sf_grid)) {
  ml1_sf_grid[[i]]$region = substr(names_ml1[i], 1,2)  
}


ml1_sf_grid <- 
  map(ml1_sf_grid, ~.x %>% 
        mutate(geo_id = as.character(geometry)) %>% 
        st_drop_geometry() %>% 
        rename(ML1 = layer) %>% 
        left_join(grid_sf, by = "geo_id") %>% 
        dplyr::select(-geo_id))

ml1_grid_time <- map(ml1_sf_grid, ~dater(.x, type = "ML1"))




# Join frames -------------------------------------------------------------

# Bind into single frames and combine into df_grid
ml2_df <- bind_rows(ml2_grid_time) 
ml1_df <- bind_rows(ml1_grid_time) 
cs_df <- bind_rows(cs_grid_time)

head(ml1_df)
head(ml2_df)
head(cs_df)

df_grid <-
  cs_df %>% 
  full_join(ml2_df %>% dplyr::select(ML2, date_ml2, cell_id), 
            by = c("date_cs" = "date_ml2",
                   "cell_id")) %>% 
  full_join(ml1_df %>% dplyr::select(ML1, date_cs, cell_id), 
            by = c("date_cs" = "date_cs",
                   "cell_id")) %>% 
  dplyr::select(everything(), geometry)
  



df_grid <- df_grid %>% st_as_sf()




# Spatial join unique cell_id to ADM1s ------------------------------------

cells <- df_grid %>% dplyr::select(cell_id, geometry)

cells <- cells[-which(duplicated(cells$cell_id)),]


cells <-
  cells %>% 
  st_as_sf() %>% 
  st_set_crs(., st_crs(joined1)) %>% 
  st_join(., joined1, largest = T) %>% 
  dplyr::select(cell_id, NAME_0, NAME_1)

df_grid <- df_grid %>% 
  full_join(st_drop_geometry(cells), by = c("cell_id")) %>% 
  st_as_sf() %>% 
  st_set_crs(., st_crs(joined1))




# Match cells to nearest unit in reports ----------------------------------
### This accounts for cells that border countries that are never in reports ###

# West Africa

# Countries that have appeared in at least one report
ever_west <- c("Burkina Faso", "Mauritania", "Mali", "Niger", "Nigeria", 
               "Chad", "Sierra Leone", "Guinea", "Liberia")

# Subset to grids within countries not in reports
wester <- df_grid %>% filter(region == "WA" & NAME_0 %nin% ever_west)

# Create frame with countries in reports
west1 <- joined1[joined1$NAME_0 %in% ever_west,]

# Unique id for each adm1 in frame
west1 <- 
  west1 %>% arrange() %>% 
  mutate(index = as.numeric(row.names(.)))

# Take grids in wrong countries and identify nearest adm1 within menu of report countries
west_near <- st_nearest_feature(wester, west1)

# Drop in identifier that matches with shapefile ids
wester$index <- west_near
wester_nog <- wester
wester_nog$geometry <- NULL

westerm <-
  wester_nog %>% 
  dplyr::select(index, cell_id, year, month, date_cs, CS, ML2, ML1, region) %>% 
  left_join(west1)
westerm$geometry <- NULL

west1$geometry <- NULL
df_grid_temp <- df_grid
# df_grid_temp$geometry <- NULL

df_west <-
  df_grid_temp %>% filter(region == "WA" & NAME_0 %in% ever_west) %>% 
  dplyr::select(cell_id, CS, ML2,ML1, region, NAME_0, 
                NAME_1, month, year, date_cs) %>% 
  bind_rows(westerm)





# East Africa

# Countries that have appeared in at least one report
ever_east <- c("Sudan", "South Sudan", "Uganda", "Ethiopia", "Tanzania", 
               "Kenya", "Somalia", "Djibouti", "Rwanda", "Yemen")

# Subset to grids within countries not in reports
easter <- df_grid %>% filter(region == "EA" & NAME_0 %nin% ever_east)
# Create shapefile with countries in reports
east1 <- joined1[joined1$NAME_0 %in% ever_east,]

# Unique id for each adm1 in shapefile
east1 <- 
  east1 %>% arrange() %>% 
  mutate(index = as.numeric(row.names(.)))

# Take grids in wrong countries and identify nearest adm1 within menu of report countries
east_near <- st_nearest_feature(easter, east1)

# Drop in identifier that matches with shapefile ids
easter$index <- east_near
easter_nog <- easter
easter_nog$geometry <- NULL

easterm <-
  easter_nog %>% 
  dplyr::select(index, cell_id, year, month, date_cs, CS, ML2, ML1, region) %>% 
  left_join(east1)
easterm$geometry <- NULL

east1$geometry <- NULL
df_grid_temp <- df_grid

df_east <-
  df_grid_temp %>% filter(region == "EA" & NAME_0 %in% ever_east) %>% 
  dplyr::select(cell_id, CS, ML2, ML1, region, NAME_0, 
                NAME_1, month, year, date_cs) %>% 
  bind_rows(easterm)


# Southern Africa 
ever_south <- c("Zambia", "Malawi", "Mozambique", "Zimbabwe",
                "Madagascar", "Democratic Republic of the Congo")

# Subset to grids within countries not in reports
souther <- df_grid %>% filter(region == "SA" & NAME_0 %nin% ever_south)
# Create shapefile with countries in reports
south1 <- joined1[joined1$NAME_0 %in% ever_south,]

# Unique id for each adm1 in shapefile
south1 <- 
  south1 %>% arrange() %>% 
  mutate(index = as.numeric(row.names(.)))

# Take grids in wrong countries and identify nearest adm1 within menu of report countries
south_near <- st_nearest_feature(souther, south1)

# Drop in identifier that matches with shapefile ids
souther$index <- south_near
souther_nog <- souther
souther_nog$geometry <- NULL

southerm <-
  souther_nog %>% 
  dplyr::select(index, cell_id, year, month, date_cs, 
                CS, ML2, ML1, region) %>% 
  left_join(south1)
southerm$geometry <- NULL

south1$geometry <- NULL
df_grid_temp <- df_grid

df_south <-
  df_grid_temp %>% filter(region == "SA" & NAME_0 %in% ever_south) %>% 
  dplyr::select(cell_id, CS, ML2, ML1, region, NAME_0, 
                NAME_1, month, year, date_cs) %>% 
  bind_rows(southerm)




# Bind and export ---------------------------------------------------------

df_grid_clean <- 
  bind_rows(df_west, df_east, df_south)


df_grid_clean <- 
  df_grid_clean %>% 
  mutate(ML2 = case_when(ML2 > 5 ~ NA_real_, TRUE ~ ML2),
         diff = CS - ML2)

df_grid_clean <- df_grid_clean %>% 
  dplyr::select(cell_id, NAME_0, NAME_1, region, CS, 
                ML2, diff, ML1, date_cs, month, year)



save(df_grid_clean, file = here::here("Grids", "df_grid_clean.Rdata"))

